Load Libraries and data

library(RColorBrewer)
library(DT)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## 
## The following object is masked from 'package:DT':
## 
##     JS
## 
## The following objects are masked from 'package:base':
## 
##     intersect, t
## 
## 
## Attaching package: 'Seurat'
## 
## The following object is masked from 'package:DT':
## 
##     JS
library(STRINGdb)
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## 
## The following object is masked from 'package:SeuratObject':
## 
##     intersect
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## 
## The following objects are masked from 'package:lubridate':
## 
##     second, second<-
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:sp':
## 
##     %over%
## 
## The following object is masked from 'package:lubridate':
## 
##     %within%
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## 
## Attaching package: 'Biobase'
## 
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## 
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## 
## 
## Attaching package: 'SummarizedExperiment'
## 
## The following object is masked from 'package:Seurat':
## 
##     Assays
## 
## The following object is masked from 'package:SeuratObject':
## 
##     Assays
library(scGraphVerse)
## Warning: replacing previous import 'igraph::components' by 'Seurat::components'
## when loading 'scGraphVerse'
library(BiocParallel)
library(GENIE3)

reticulate::use_python("/usr/bin/python3", required = TRUE)
arboreto <- reticulate::import("arboreto.algo")
pandas <- reticulate::import("pandas")
numpy <- reticulate::import("numpy")

time <- list()

ddir <- "/home/francescoc/Desktop/NodeVerse/analysis/GENIE3/data"
pdir <- "/home/francescoc/Desktop/NodeVerse/analysis/GENIE3/plots"

adjm <- as.matrix(read.table("./../analysis/adjm_top1200.txt"))
colnames(adjm) <- rownames(adjm)
count_matrices <- readRDS("./../analysis/sim_n200p500.RDS")
dim(count_matrices[[1]])
## [1] 200 554
count_matrices <- lapply(count_matrices, t)

GENIE3

Late integration

set.seed(1234)
time[["GENIE3_late_15Cores"]] <- system.time(
  genie3_late <- infer_networks(count_matrices, 
                                method="GENIE3",
                                nCores = 15)
)

Symmetrize and ROC

genie3_late_wadj <- generate_adjacency(genie3_late)
sgenie3_late_wadj <- symmetrize(genie3_late_wadj, weight_function = "mean")

plotROC(sgenie3_late_wadj, adjm, plot_title = "ROC curve - GENIE3 Late Integration", is_binary = F)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Cutoff

sgenie3_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                                     weighted_adjm_list = sgenie3_late_wadj, 
                                     n = 3,
                                     method = "GENIE3",
                                     nCores = 15)

pscores(adjm, sgenie3_late_adj)
## Registered S3 methods overwritten by 'fmsb':
##   method    from
##   print.roc pROC
##   plot.roc  pROC

## $Statistics
##   Predicted_Matrix  TP     TN   FP  FN       TPR        FPR  Precision
## 2         Matrix 1 962 144048 7714 457 0.6779422 0.05082959 0.11088059
## 3         Matrix 2 782 143770 7992 637 0.5510923 0.05266140 0.08912697
## 4         Matrix 3 660 143830 7932 759 0.4651163 0.05226605 0.07681564
##          F1       MCC
## 2 0.1905894 0.2599064
## 3 0.1534386 0.2054872
## 4 0.1318550 0.1718900
plotg(sgenie3_late_adj)

## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]

Consensus

consesusm <- create_consensus(sgenie3_late_adj, method="vote")
consesusu <- create_consensus(sgenie3_late_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sgenie3_late_adj, weighted_list = sgenie3_late_wadj, method = "INet", threshold = 0.05, ncores = 15)
## [1] 0.3507468
## [1] 0.06983645
pscores(adjm, list(consesusm))

## $Statistics
##   Predicted_Matrix  TP     TN   FP  FN       TPR         FPR Precision
## 2         Matrix 1 821 150411 1351 598 0.5785765 0.008902097 0.3779926
##          F1       MCC
## 2 0.4572542 0.4616024
pscores(adjm, list(consesusu))

## $Statistics
##   Predicted_Matrix   TP     TN    FP  FN       TPR       FPR  Precision
## 2         Matrix 1 1251 129533 22229 168 0.8816068 0.1464728 0.05327939
##         F1       MCC
## 2 0.100486 0.1954873
pscores(adjm, list(consesunet))

## $Statistics
##   Predicted_Matrix   TP     TN    FP  FN       TPR       FPR  Precision
## 2         Matrix 1 1245 130718 21044 174 0.8773784 0.1386645 0.05585715
##          F1       MCC
## 2 0.1050278 0.2006999

Plot comparison

compare_consensus(consesusm, adjm)

compare_consensus(consesusu, adjm)

compare_consensus(consesunet, adjm)

Early integration

count_matrices <- lapply(count_matrices, as.matrix)
early_matrix <- list(earlyj(count_matrices, rowg = T))

set.seed(1234)
time[["GENIE3_early_15Cores"]] <- system.time(
  genie3_early <- infer_networks(early_matrix, method="GENIE3", nCores = 15)
)

Symmetrize and ROC

genie3_early_wadj <- generate_adjacency(genie3_early)
sgenie3_early_wadj <- symmetrize(genie3_early_wadj, weight_function = "mean")
plotROC(sgenie3_early_wadj, adjm, plot_title = "ROC curve - GENIE3 Early Integration", is_binary = F)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Cutoff

sgenie3_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
                                      weighted_adjm_list = sgenie3_early_wadj, 
                                      n = 2,
                                      method = "GENIE3",
                                      nCores = 15)

pscores(adjm, sgenie3_early_adj)

## $Statistics
##   Predicted_Matrix   TP     TN   FP  FN      TPR        FPR Precision        F1
## 2         Matrix 1 1225 143476 8286 194 0.863284 0.05459865 0.1287982 0.2241537
##         MCC
## 2 0.3210378
plotg(sgenie3_early_adj)

## TableGrob (1 x 1) "arrange": 1 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]

Plot comparison

compare_consensus(sgenie3_early_adj[[1]], adjm)

GRNBoost2

Late integration

set.seed(1234)
time[["grnboost_late"]] <- system.time(
  grnboost_late <- infer_networks(count_matrices, 
                                method="GRNBoost2",
                                nCores = 15)
)
## /home/francescoc/.local/lib/python3.8/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.
## Perhaps you already have a cluster running?
## Hosting the HTTP server on port 41023 instead
##   warnings.warn(
## /home/francescoc/.local/lib/python3.8/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.
## Perhaps you already have a cluster running?
## Hosting the HTTP server on port 42387 instead
##   warnings.warn(

Symmetrize and ROC

grnboost_late_wadj <- generate_adjacency(grnboost_late)
sgrnboost_late_wadj <- symmetrize(grnboost_late_wadj, weight_function = "mean")

grnboost_late_auc <- plotROC(sgrnboost_late_wadj, adjm, plot_title = "ROC curve - grnboost Late Integration")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Cutoff

sgrnboost_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                                     weighted_adjm_list = sgrnboost_late_wadj, 
                                     n = 3,
                                     method = "GRNBoost2",
                                     nCores = 15)

scores.grnboost.late.all <- pscores(adjm, sgrnboost_late_adj)

plots <- plotg(sgrnboost_late_adj)

Consensus

consesusm <- create_consensus(sgrnboost_late_adj, method="vote")
consesusu <- create_consensus(sgrnboost_late_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sgrnboost_late_adj, weighted_list = sgrnboost_late_wadj, method = "INet", threshold = 0.05)
## [1] 0.3209568
## [1] 0.04876259
scores.grnboost.late <- pscores(adjm, list(consesusm))

scoresu.grnboost.late <- pscores(adjm, list(consesusu))

scoresnet.grnboost.late <- pscores(adjm, list(consesunet))

Plot comparison

ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

ajm_compared <- compare_consensus(consesunet, adjm)

Early integration

early_matrix <- list(earlyj(count_matrices))

set.seed(1234)
time[["grnboost_early"]] <- system.time(
  grnboost_early <- infer_networks(early_matrix, method="GRNBoost2")
)

Symmetrize and ROC

grnboost_early_wadj <- generate_adjacency(grnboost_early)
sgrnboost_early_wadj <- symmetrize(grnboost_early_wadj, weight_function = "mean")

plotROC(sgrnboost_early_wadj, adjm, plot_title = "ROC curve - grnboost Early Integration")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Cutoff

sgrnboost_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
                                      weighted_adjm_list = sgrnboost_early_wadj, 
                                      n = 2,
                                      method = "GRNBoost2",
                                      nCores = 15)

scores.grnboost.early <- pscores(adjm, sgrnboost_early_adj)

plots <- plotg(sgrnboost_early_adj)

Plot comparison

ajm_compared <- compare_consensus(sgrnboost_early_adj[[1]], adjm)

Joint Integration

Joint Random Forest

#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")
set.seed(1234)
time[["JRF"]] <- system.time(
  jrf_mat <- infer_networks(count_matrices, method="JRF", nCores = 15)
)
 
jrf_mat[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Prepare the output

jrf_list <- list()

importance_columns <- grep("importance", names(jrf_mat[[1]]), value = TRUE)

for (i in seq_along(importance_columns)) {
# Select the 'gene1', 'gene2', and the current 'importance' column
  df <- jrf_mat[[1]][, c("gene1", "gene2", importance_columns[i])]
  
  # Rename the importance column to its original name (e.g., importance1, importance2, etc.)
  names(df)[3] <- importance_columns[i]
  
  # Add the data frame to the output list
  jrf_list[[i]] <- df
}

saveRDS(jrf_list, "./../analysis/jrf.RDS")

jrf_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

symmetrize Output and ROC

jrf_wadj <- generate_adjacency(jrf_list)
sjrf_wadj <- symmetrize(jrf_wadj, weight_function = "mean")
jrf_auc_mine <- plotROC(sjrf_wadj, adjm, plot_title = "ROC curve - JRF Late Integration", is_binary = F)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

sjrf_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF symmetrize output")
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Generate Adjacency and Apply Cutoff

sjrf_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sjrf_wadj, 
                 n = 3,
                 method = "JRF")

sjrf_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF adjacency")
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Comparison with the Ground Truth

scores.jrf.all <- pscores(adjm, sjrf_adj)

scores.jrf.all$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sjrf_adj)

consesusm <- create_consensus(sjrf_adj, method="vote")
consesusu <- create_consensus(sjrf_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sjrf_adj, weighted_list = sjrf_wadj, method = "INet", threshold = 0.1, ncores = 15)
## [1] 0.1786084
## [1] 0.03367249
scores.jrf <- pscores(adjm, list(consesusm))

scoresu.jrf <- pscores(adjm, list(consesusu))

scoresnet.jrf <- pscores(adjm, list(consesunet))

scores.jrf$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores vote")
scoresu.jrf$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores union")
scoresnet.jrf$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores INet")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

ajm_compared <- compare_consensus(consesunet, adjm)